package main

import (
	"fmt"
	"github.com/doumadou/mahonia"
	"io/ioutil"
	"net/http"
	"regexp"
)

func Get(url string) (content []byte, statusCode int) {
	resp, err := http.Get(url)
	if err != nil {
		statusCode = -100
		return
	}
	defer resp.Body.Close()
	content, err = ioutil.ReadAll(resp.Body)
	if err != nil {
		statusCode = -200
		return
	}
	statusCode = 200
	return
}

func GetUtf8(url string) (content string, statusCode int) {
	originContent, statusCode := Get(url)
	if statusCode != 200 {
		return
	}
	reg := regexp.MustCompile(`charset=\w+`)
	charsetBytes := reg.Find(originContent)
	var charset string
	if charsetBytes != nil && len(charsetBytes) > 0 {
		charset = string(charsetBytes[8:])
	} else {
		charset = "utf8"
	}
	charsetDecoder := mahonia.NewDecoder(charset)
	content = charsetDecoder.ConvertString(string(originContent))
	return
}

func GetName(content string) (name string) {
	reg := regexp.MustCompile(`title>.*<`)
	title := reg.FindString(content)
	reg = regexp.MustCompile(`《.*》`)
	name = reg.FindString(title)
	reg = regexp.MustCompile(`[《》\s]`)
	name = reg.ReplaceAllString(name, "")
	return
}

func GetAllLinks(content string, keyWord string) (links []string) {
	regLink := regexp.MustCompile(`<a href=".*"`)
	regKeyWord := regexp.MustCompile(keyWord)
	allLinks := regLink.FindAllString(content, -1)
	for i := 0; i < len(allLinks); i++ {
		if regKeyWord.MatchString(allLinks[i]) {
			url := allLinks[i][9:len(allLinks[i])-1]
			links = append(links, url)
		}
	}
	return
}

func main() {
	content, statusCode := GetUtf8("http://www.dy2018.com/i/95808.html")
	if statusCode != 200 {
		fmt.Errorf("status code: %d", statusCode)
		return
	}
	name := GetName(content)
	fmt.Println(name)
	links := GetAllLinks(content, name)
	fmt.Println(links)
}
